library(tidybayes)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(rstanarm)
## Loading required package: Rcpp
## This is rstanarm version 2.32.1
## - See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
## - Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
##   options(mc.cores = parallel::detectCores())
library(bayesplot)
## This is bayesplot version 1.14.0
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting
library(posterior)
## This is posterior version 1.6.1
## 
## Attaching package: 'posterior'
## 
## The following object is masked from 'package:bayesplot':
## 
##     rhat
## 
## The following objects are masked from 'package:stats':
## 
##     mad, sd, var
## 
## The following objects are masked from 'package:base':
## 
##     %in%, match
library(loo)
## This is loo version 2.8.0
## - Online documentation and vignettes at mc-stan.org/loo
## - As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session.
library(bayesrules)
library(dplyr)
library(broom.mixed)
library(rstan)
## Loading required package: StanHeaders
## 
## rstan version 2.32.7 (Stan version 2.32.2)
## 
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
## change `threads_per_chain` option:
## rstan_options(threads_per_chain = 1)
## 
## 
## Attaching package: 'rstan'
## 
## The following objects are masked from 'package:posterior':
## 
##     ess_bulk, ess_tail
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
# Load Dataset
wine <- read.csv("WineQT.csv", header = TRUE)
set.seed(36501)

1. Bayesian Estimation

1.1 Discrete data (high-quality/low-quality Wines)

## Data Preparation (Defining the Discrete Outcome)
# Define 'High Quality' as quality >= 7
# Define Quality >=7 as 1, else 0 (Binary Outcome)
high_quality_wines <- wine %>%
  mutate(Is_High_Quality = ifelse(quality >= 7, 1, 0))

# Sample Statistics
N <- nrow(high_quality_wines)
Y <- sum(high_quality_wines$Is_High_Quality)
cat("Total number of wines(N):", N, "\n")
## Total number of wines(N): 1143
cat("Number of high-quality wines (Y):", Y, "\n")
## Number of high-quality wines (Y): 159

Interpretation:

For the discrete parameter analysis, we focused on estimating θ(theta), the proportion of high-quality wines in the dataset. We transformed the quality variable (originally ranging from 3 to 9) into a binary outcome by defining wines with quality ≥ 7 as “high quality” (coded as 1) and wines below this threshold as “not high quality” (coded as 0).

1.1.1 Prior specification

# Model: Y ~ Binomial(N, theta), theta ~ Beta(alpha_0, beta_0)
alpha_0 <- 1.0 
beta_0 <- 1.0

# Prior: θ ~ Beta(1, 1) (Uniform Prior)

Explanation:

We choose the Beta(1, 1) prior, which is a non-informative Uniform prior. This prior assigns equal probability density to all possible values of the proportion θ (from 0 to 1). It is the conjugate prior for the Binomial likelihood, guaranteeing a closed-form posterior solution.

1.1.2 Theoretical Posterior (Conjugate)

# Posterior parameters
alpha_N <- alpha_0 + Y
beta_N <- beta_0 + N - Y

# Theoretical Summary
theoretical_summary <- data.frame(
  Mean = alpha_N / (alpha_N + beta_N), # E[theta | Y]
  SD = sqrt((alpha_N * beta_N) / ((alpha_N + beta_N)^2 * (alpha_N + beta_N + 1))), # SD of Beta dist
  CI_Lower = qbeta(0.025, alpha_N, beta_N),
  CI_Upper = qbeta(0.975, alpha_N, beta_N)
)

print(theoretical_summary)
##       Mean         SD  CI_Lower  CI_Upper
## 1 0.139738 0.01024189 0.1202705 0.1603973

1.1.3 MCMC Estimation using stan()

stan_quality_model <- "
  data {
    int<lower=0> N;       
    int<lower=0> Y;        
    real<lower=0> alpha_0; 
    real<lower=0> beta_0;  
  }
  parameters {
    real<lower=0, upper=1> theta; 
  }
  model {
    theta ~ beta(alpha_0, beta_0);

    Y ~ binomial(N, theta);
  }
"
  
# Fit model
quality_stan_fit <- stan(
  model_code = stan_quality_model,
  data = list(N = N, Y = Y, alpha_0 = alpha_0, beta_0 = beta_0),
  chains = 4,
  iter = 10000,
  seed = 36501
)
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 7e-06 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.07 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 1: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 1: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 1: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 1: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 1: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 1: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 1: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 1: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 1: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 1: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 1: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.022 seconds (Warm-up)
## Chain 1:                0.028 seconds (Sampling)
## Chain 1:                0.05 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 2e-06 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.02 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 2: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 2: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 2: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 2: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 2: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 2: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 2: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 2: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 2: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 2: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 2: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 0.022 seconds (Warm-up)
## Chain 2:                0.022 seconds (Sampling)
## Chain 2:                0.044 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 2e-06 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.02 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 3: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 3: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 3: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 3: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 3: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 3: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 3: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 3: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 3: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 3: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 3: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 0.022 seconds (Warm-up)
## Chain 3:                0.022 seconds (Sampling)
## Chain 3:                0.044 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 2e-06 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.02 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 4: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 4: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 4: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 4: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 4: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 4: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 4: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 4: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 4: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 4: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 4: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 0.022 seconds (Warm-up)
## Chain 4:                0.022 seconds (Sampling)
## Chain 4:                0.044 seconds (Total)
## Chain 4:
print(quality_stan_fit, pars = "theta")
## Inference for Stan model: anon_model.
## 4 chains, each with iter=10000; warmup=5000; thin=1; 
## post-warmup draws per chain=5000, total post-warmup draws=20000.
## 
##       mean se_mean   sd 2.5%  25%  50%  75% 97.5% n_eff Rhat
## theta 0.14       0 0.01 0.12 0.13 0.14 0.15  0.16  6687    1
## 
## Samples were drawn using NUTS(diag_e) at Tue Dec  9 20:28:50 2025.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

1.1.4 MCMC Diagnotics

Trace Plot

mcmc_trace(quality_stan_fit, pars = "theta")

Density Plot

# Density Plot
mcmc_dens_overlay(quality_stan_fit, pars = "theta")

Autocorrelation plot

mcmc_acf(quality_stan_fit, pars = "theta")

R-hat values

rhat_value_disc <- summary(quality_stan_fit, pars = "theta")$summary[, "Rhat"]
cat("R-hat value:", rhat_value_disc, "\n")
## R-hat value: 1.000607

R-hat = 1.00: Excellent convergence (< 1.01 threshold).

Effective sample size ratio

neff_ratio(quality_stan_fit, pars = "theta")
## [1] 0.3343606

The ESS ratio of 0.334 indicates that about one-third of our 40,000 MCMC samples are statistically independent, which is sufficient for reliable posterior inference.

1.1.5 Comparison: MCMC vs Theoretical Posterior

# Extract posterior samples
theta_samples <- as.numeric(extract(quality_stan_fit)$theta)

# Numerical comparison
comparison_discrete <- data.frame(
  Method = c("Theoretical (Beta)", "MCMC (Stan)"),
  Mean = c(theoretical_summary$Mean, mean(theta_samples)),
  SD = c(theoretical_summary$SD, sd(theta_samples)),
  CI_2.5 = c(theoretical_summary$CI_Lower, as.numeric(quantile(theta_samples, 0.025))),
  CI_97.5 = c(theoretical_summary$CI_Upper, as.numeric(quantile(theta_samples, 0.975)))
)
print(comparison_discrete)
##               Method      Mean         SD    CI_2.5   CI_97.5
## 1 Theoretical (Beta) 0.1397380 0.01024189 0.1202705 0.1603973
## 2        MCMC (Stan) 0.1398676 0.01033205 0.1205440 0.1606909
# Graphical comparison
theta_grid <- seq(0.05, 0.25, length.out = 1000) # Adjust range based on data
theoretical_density_disc <- dbeta(theta_grid, alpha_N, beta_N)

posterior_disc_plot <- ggplot() +
  geom_density(data = data.frame(theta = theta_samples), 
               aes(x = theta, fill = "MCMC"), alpha = 0.5) +
  geom_line(data = data.frame(theta = theta_grid, density = theoretical_density_disc),
            aes(x = theta, y = density, color = "Theoretical"), size = 1.2) +
  scale_fill_manual(values = c("MCMC" = "steelblue")) +
  scale_color_manual(values = c("Theoretical" = "black")) +
  labs(title = "Posterior Distribution: MCMC vs Theoretical (High Quality Proportion)",
       x = "θ (Proportion of High Quality Wines)", y = "Density") +
  theme_minimal() +
  theme(legend.title = element_blank())
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(posterior_disc_plot)

Interpretation:

The close alignment between MCMC and theoretical results validates our MCMC implementation and confirms the accuracy of the sampling algorithm. This comparison is possible because the Beta-Binomial model has a closed-form conjugate posterior.

1.2 Continuous data (alcohol)

alcohol_data <- wine$alcohol
summary(alcohol_data)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    8.40    9.50   10.20   10.44   11.10   14.90

For continuous data, we chose alcohol (μ) as the parameter that we are interested in.

1.2.1 Prior specification

mu_0 <- 10
sigma_0 <- 2
n <- length(alcohol_data)
y_bar <- mean(alcohol_data)
sigma_known <- sd(alcohol_data)

Prior: μ ~ N(10, 2²)

Explanation: - μ₀ = 10: typical wine alcohol content is around 10%. - σ₀ = 2: weakly informative, allowing 95% prior probability. - Based on general wine knowledge but allows data to dominate.

Assumed: σ = 1.082 (known, using sample SD)

1.2.2 Theoretical posterior (conjugate)

tau_0 <- 1/sigma_0^2
tau <- 1/sigma_known^2
mu_n <- (tau_0*mu_0 + n*tau*y_bar) / (tau_0 + n*tau)
sigma_n <- sqrt(1/(tau_0 + n*tau))

data.frame(
  Mean = mu_n,
  SD = sigma_n,
  CI_Lower = qnorm(0.025, mu_n, sigma_n),
  CI_Upper = qnorm(0.975, mu_n, sigma_n)
)
##     Mean         SD CI_Lower CI_Upper
## 1 10.442 0.03200568 10.37927 10.50473

1.2.3 MCMC Estimation using stan()

stan_model <- "
  data {
    int<lower=0> n;
    vector[n] Y;
    real mu_0;
    real<lower=0> sigma_0;
    real<lower=0> sigma;
  }
  parameters {
    real mu;
  }
  model {
    Y ~ normal(mu, sigma);
    mu ~ normal(mu_0, sigma_0);
  }
"
# Fit model
stan_fit <- stan(
  model_code = stan_model,
  data = list(n = n, Y = alcohol_data, 
              mu_0 = mu_0, sigma_0 = sigma_0, 
              sigma = sigma_known),
  chains = 4,
  iter = 10000,
  seed = 36501
)
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 8e-06 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.08 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 1: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 1: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 1: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 1: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 1: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 1: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 1: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 1: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 1: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 1: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 1: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.049 seconds (Warm-up)
## Chain 1:                0.053 seconds (Sampling)
## Chain 1:                0.102 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 4e-06 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 2: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 2: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 2: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 2: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 2: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 2: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 2: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 2: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 2: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 2: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 2: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 0.046 seconds (Warm-up)
## Chain 2:                0.049 seconds (Sampling)
## Chain 2:                0.095 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 5e-06 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.05 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 3: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 3: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 3: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 3: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 3: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 3: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 3: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 3: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 3: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 3: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 3: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 0.051 seconds (Warm-up)
## Chain 3:                0.052 seconds (Sampling)
## Chain 3:                0.103 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 4e-06 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 4: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 4: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 4: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 4: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 4: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 4: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 4: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 4: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 4: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 4: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 4: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 0.046 seconds (Warm-up)
## Chain 4:                0.047 seconds (Sampling)
## Chain 4:                0.093 seconds (Total)
## Chain 4:
print(stan_fit, pars = "mu")
## Inference for Stan model: anon_model.
## 4 chains, each with iter=10000; warmup=5000; thin=1; 
## post-warmup draws per chain=5000, total post-warmup draws=20000.
## 
##     mean se_mean   sd  2.5%   25%   50%   75% 97.5% n_eff Rhat
## mu 10.44       0 0.03 10.38 10.42 10.44 10.46  10.5  7076    1
## 
## Samples were drawn using NUTS(diag_e) at Tue Dec  9 20:30:11 2025.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

1.2.4 MCMC Dioagnostics

Trace Plot

mcmc_trace(stan_fit, pars = "mu")

Interpretation: Chains mix well with no trends, indicating convergence.

Density Plot

mcmc_dens_overlay(stan_fit, pars = "mu")

Interpretation: All chains have similar distributions, confirming convergence.

Autocorrelation plot

mcmc_acf(stan_fit, pars = "mu")

R-hat values

summary(stan_fit, pars = "mu")$summary[, "Rhat"]
## [1] 1.000394

R-hat = 1.00: Excellent convergence (< 1.01 threshold).

Effective sample size ratio

neff_ratio(stan_fit, pars = "mu")
## [1] 0.3538086

An ESS ratio of 0.354 demonstrates highly efficient MCMC sampling with adequate independent samples for the continuous parameter μ.

1.2.5 Comparison: MCMC vs Theoretical Posterior

# Extract posterior samples
posterior_samples <- as.numeric(extract(stan_fit)$mu)


# Numerical comparison
comparison <- data.frame(
  Method = c("Theoretical", "MCMC (Stan)"),
  Mean = c(mu_n, mean(posterior_samples)),
  SD = c(sigma_n, sd(posterior_samples)),
  CI_2.5 = c(qnorm(0.025, mu_n, sigma_n), 
             as.numeric(quantile(posterior_samples, 0.025))),
  CI_97.5 = c(qnorm(0.975, mu_n, sigma_n), 
              as.numeric(quantile(posterior_samples, 0.975)))
)

print(comparison)
##        Method     Mean         SD   CI_2.5  CI_97.5
## 1 Theoretical 10.44200 0.03200568 10.37927 10.50473
## 2 MCMC (Stan) 10.44175 0.03200521 10.37922 10.50482
# Graphical comparison
mu_grid <- seq(10.3, 10.6, length.out = 1000)
theoretical <- dnorm(mu_grid, mu_n, sigma_n)

ggplot() +
  geom_density(data = data.frame(mu = posterior_samples), 
               aes(x = mu, fill = "MCMC"), alpha = 0.5) +
  geom_line(data = data.frame(mu = mu_grid, density = theoretical),
            aes(x = mu, y = density, color = "Theoretical"), size = 1.2) +
  scale_fill_manual(values = c("MCMC" = "steelblue")) +
  scale_color_manual(values = c("Theoretical" = "black")) +
  labs(title = "Posterior: MCMC vs Theoretical",
       x = "μ (mean alcohol %)", y = "Density") +
  theme_minimal()

Interpretation:

The close alignment between MCMC and theoretical posterior validates our MCMC estimation and confirms the accuracy of the sampling algorithm. This comparison is possible because the Normal-Normal conjugate model has a closed-form posterior solution.

2. Bayesian Regression

2.1 Checking Priors

#Full model
summary(wine) #Prior for intercept, quality centered at roughly 5.5 with 1.5 SE
##  fixed.acidity    volatile.acidity  citric.acid     residual.sugar  
##  Min.   : 4.600   Min.   :0.1200   Min.   :0.0000   Min.   : 0.900  
##  1st Qu.: 7.100   1st Qu.:0.3925   1st Qu.:0.0900   1st Qu.: 1.900  
##  Median : 7.900   Median :0.5200   Median :0.2500   Median : 2.200  
##  Mean   : 8.311   Mean   :0.5313   Mean   :0.2684   Mean   : 2.532  
##  3rd Qu.: 9.100   3rd Qu.:0.6400   3rd Qu.:0.4200   3rd Qu.: 2.600  
##  Max.   :15.900   Max.   :1.5800   Max.   :1.0000   Max.   :15.500  
##    chlorides       free.sulfur.dioxide total.sulfur.dioxide    density      
##  Min.   :0.01200   Min.   : 1.00       Min.   :  6.00       Min.   :0.9901  
##  1st Qu.:0.07000   1st Qu.: 7.00       1st Qu.: 21.00       1st Qu.:0.9956  
##  Median :0.07900   Median :13.00       Median : 37.00       Median :0.9967  
##  Mean   :0.08693   Mean   :15.62       Mean   : 45.91       Mean   :0.9967  
##  3rd Qu.:0.09000   3rd Qu.:21.00       3rd Qu.: 61.00       3rd Qu.:0.9978  
##  Max.   :0.61100   Max.   :68.00       Max.   :289.00       Max.   :1.0037  
##        pH          sulphates         alcohol         quality     
##  Min.   :2.740   Min.   :0.3300   Min.   : 8.40   Min.   :3.000  
##  1st Qu.:3.205   1st Qu.:0.5500   1st Qu.: 9.50   1st Qu.:5.000  
##  Median :3.310   Median :0.6200   Median :10.20   Median :6.000  
##  Mean   :3.311   Mean   :0.6577   Mean   :10.44   Mean   :5.657  
##  3rd Qu.:3.400   3rd Qu.:0.7300   3rd Qu.:11.10   3rd Qu.:6.000  
##  Max.   :4.010   Max.   :2.0000   Max.   :14.90   Max.   :8.000  
##        Id      
##  Min.   :   0  
##  1st Qu.: 411  
##  Median : 794  
##  Mean   : 805  
##  3rd Qu.:1210  
##  Max.   :1597
prior_full_wine_model <- stan_glm(
  quality~., data = wine, family = gaussian, 
  prior_intercept = normal(5.5, 1.5), 
  prior = normal(0,  2.5, autoscale = TRUE), 
  prior_aux = exponential(1, autoscale = TRUE), 
  chains = 4, iter = 2*5000, seed = 36501, prior_PD = TRUE) 
## 
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0.014847 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 148.47 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 1: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 1: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 1: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 1: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 1: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 1: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 1: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 1: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 1: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 1: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 1: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.189 seconds (Warm-up)
## Chain 1:                0.193 seconds (Sampling)
## Chain 1:                0.382 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 1.1e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.11 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 2: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 2: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 2: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 2: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 2: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 2: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 2: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 2: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 2: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 2: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 2: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 0.217 seconds (Warm-up)
## Chain 2:                0.256 seconds (Sampling)
## Chain 2:                0.473 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 1.1e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.11 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 3: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 3: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 3: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 3: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 3: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 3: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 3: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 3: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 3: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 3: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 3: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 0.195 seconds (Warm-up)
## Chain 3:                0.203 seconds (Sampling)
## Chain 3:                0.398 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 1e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.1 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 4: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 4: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 4: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 4: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 4: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 4: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 4: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 4: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 4: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 4: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 4: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 0.185 seconds (Warm-up)
## Chain 4:                0.204 seconds (Sampling)
## Chain 4:                0.389 seconds (Total)
## Chain 4:
#Use weakly informative priors for the Coefficients and Sigma

#model relationship defined as
#
#     data:     Y_i | B0, B1,... B12, sigma ~ N(mu_i, sigma^2) with mu_i = B0 + B1*Xi_1 + ... + B12*Xi_12
#     priors:   B0_c ~ N(5.5, 1.5^2)
#               B1,...B12 ~ N(0, 2.5^2) #unassuming location and variance, weak
#               sigma ~ Exp(1)

## Density of the response 
#Plot to show predicted draws from the prior model for quality
wine |>
  add_predicted_draws(prior_full_wine_model, n = 100) |>
  ggplot(aes(x = .prediction, group = .draw)) +
    geom_density() + 
    xlab("quality")
## Warning: 
## In add_predicted_draws(): The `n` argument is a deprecated alias for `ndraws`.
## Use the `ndraws` argument instead.
## See help("tidybayes-deprecated").

## Plot to show fitted draws from the prior model between quality and alcohol


wine %>% add_fitted_draws(prior_full_wine_model, n=100) %>%
  ggplot(aes(x= alcohol, y=.value)) + geom_line(aes(group=.draw))
## Warning in fitted_draws.default(model = model, newdata = newdata, ..., n = n): `fitted_draws` and `add_fitted_draws` are deprecated as their names were confusing.
## - Use [add_]epred_draws() to get the expectation of the posterior predictive.
## - Use [add_]linpred_draws() to get the distribution of the linear predictor.
## - For example, you used [add_]fitted_draws(..., scale = "response"), which
##   means you most likely want [add_]epred_draws(...).
## NOTE: When updating to the new functions, note that the `model` parameter is now
##   named `object` and the `n` parameter is now named `ndraws`.

## Plot to show fitted draws from the prior model between quality and volatile.acidity
wine %>% add_fitted_draws(prior_full_wine_model, n=100) %>%
  ggplot(aes(x= volatile.acidity , y=.value)) + geom_line(aes(group=.draw))
## Warning in fitted_draws.default(model = model, newdata = newdata, ..., n = n): `fitted_draws` and `add_fitted_draws` are deprecated as their names were confusing.
## - Use [add_]epred_draws() to get the expectation of the posterior predictive.
## - Use [add_]linpred_draws() to get the distribution of the linear predictor.
## - For example, you used [add_]fitted_draws(..., scale = "response"), which
##   means you most likely want [add_]epred_draws(...).
## NOTE: When updating to the new functions, note that the `model` parameter is now
##   named `object` and the `n` parameter is now named `ndraws`.

Both plots exhibit weak prior assumptions for coefficients and sigma.

2.2 update model to posterior

full_wine_model <- update(prior_full_wine_model, prior_PD = FALSE)
## 
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 3.3e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.33 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 1: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 1: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 1: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 1: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 1: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 1: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 1: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 1: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 1: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 1: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 1: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.526 seconds (Warm-up)
## Chain 1:                0.958 seconds (Sampling)
## Chain 1:                1.484 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 2.1e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.21 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 2: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 2: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 2: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 2: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 2: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 2: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 2: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 2: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 2: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 2: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 2: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 0.564 seconds (Warm-up)
## Chain 2:                0.936 seconds (Sampling)
## Chain 2:                1.5 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 1.5e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.15 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 3: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 3: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 3: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 3: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 3: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 3: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 3: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 3: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 3: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 3: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 3: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 0.567 seconds (Warm-up)
## Chain 3:                0.955 seconds (Sampling)
## Chain 3:                1.522 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 1.5e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.15 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 4: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 4: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 4: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 4: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 4: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 4: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 4: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 4: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 4: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 4: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 4: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 0.535 seconds (Warm-up)
## Chain 4:                0.868 seconds (Sampling)
## Chain 4:                1.403 seconds (Total)
## Chain 4:
# Confirm prior specification, possibly unnecessasry
prior_summary(full_wine_model)
## Priors for model 'full_wine_model' 
## ------
## Intercept (after predictors centered)
##  ~ normal(location = 5.5, scale = 1.5)
## 
## Coefficients
##   Specified prior:
##     ~ normal(location = [0,0,0,...], scale = [2.5,2.5,2.5,...])
##   Adjusted prior:
##     ~ normal(location = [0,0,0,...], scale = [ 1.15,11.21,10.24,...])
## 
## Auxiliary (sigma)
##   Specified prior:
##     ~ exponential(rate = 1)
##   Adjusted prior:
##     ~ exponential(rate = 1.2)
## ------
## See help('prior_summary.stanreg') for more details
summary(full_wine_model, probs = c(0.025, 0.975))
## 
## Model Info:
##  function:     stan_glm
##  family:       gaussian [identity]
##  formula:      quality ~ .
##  algorithm:    sampling
##  sample:       20000 (posterior sample size)
##  priors:       see help('prior_summary')
##  observations: 1143
##  predictors:   13
## 
## Estimates:
##                        mean   sd    2.5%   97.5%
## (Intercept)           23.0   25.0 -25.4   72.4  
## fixed.acidity          0.0    0.0   0.0    0.1  
## volatile.acidity      -1.1    0.1  -1.4   -0.8  
## citric.acid           -0.1    0.2  -0.5    0.2  
## residual.sugar         0.0    0.0   0.0    0.1  
## chlorides             -1.7    0.5  -2.7   -0.7  
## free.sulfur.dioxide    0.0    0.0   0.0    0.0  
## total.sulfur.dioxide   0.0    0.0   0.0    0.0  
## density              -18.8   25.5 -69.0   30.7  
## pH                    -0.4    0.2  -0.9    0.0  
## sulphates              0.9    0.1   0.6    1.1  
## alcohol                0.3    0.0   0.2    0.3  
## Id                     0.0    0.0   0.0    0.0  
## sigma                  0.6    0.0   0.6    0.7  
## 
## Fit Diagnostics:
##            mean   sd   2.5%   97.5%
## mean_PPD 5.7    0.0  5.6    5.7    
## 
## The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
## 
## MCMC diagnostics
##                      mcse Rhat n_eff
## (Intercept)          0.3  1.0   8292
## fixed.acidity        0.0  1.0   8423
## volatile.acidity     0.0  1.0  15462
## citric.acid          0.0  1.0  14361
## residual.sugar       0.0  1.0  12244
## chlorides            0.0  1.0  14915
## free.sulfur.dioxide  0.0  1.0  14351
## total.sulfur.dioxide 0.0  1.0  13117
## density              0.3  1.0   8174
## pH                   0.0  1.0   9884
## sulphates            0.0  1.0  16601
## alcohol              0.0  1.0   9358
## Id                   0.0  1.0  19295
## sigma                0.0  1.0  22015
## mean_PPD             0.0  1.0  21757
## log-posterior        0.0  1.0   7957
## 
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).

2.2.1 Check full model MCMC diagnostics

mcmc_trace(full_wine_model)

mcmc_dens_overlay(full_wine_model)

mcmc_acf(full_wine_model)

Interpretation:

Diagnostics exhibit quick, well-mixing, trace plots with normal densities and low autocorrelation for the features. Rhats show variance stability between and within chains, and effective ratios between 0.4 and 1.1 for our coefficients, sigma, and intercept, also point to low autocorrelation. These diagnostics point to trustworthy results from the MCMC.

Estimates from the posterior summary also show that, for 95% of the probability densities, only volatile.acidity, chlorides, sulphates, and alcohol intervals outside of 0. We’ll examine these in a reduced regression model using MCMC.

# reduced model
reduced_wine_model <- stan_glm(
  quality~volatile.acidity+chlorides+sulphates+alcohol, 
  data = wine, family = gaussian, 
  prior_intercept = normal(5.5, 1.5), 
  prior = normal(0,  2.5, autoscale = TRUE), 
  prior_aux = exponential(1, autoscale = TRUE), 
  chains = 4, iter = 2*5000, seed = 36501) #Same weakly informative priors for the Coefficients and Sigma
## 
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 2.3e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.23 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 1: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 1: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 1: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 1: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 1: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 1: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 1: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 1: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 1: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 1: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 1: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.159 seconds (Warm-up)
## Chain 1:                0.539 seconds (Sampling)
## Chain 1:                0.698 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 1.1e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.11 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 2: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 2: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 2: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 2: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 2: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 2: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 2: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 2: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 2: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 2: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 2: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 0.158 seconds (Warm-up)
## Chain 2:                0.537 seconds (Sampling)
## Chain 2:                0.695 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 1.1e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.11 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 3: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 3: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 3: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 3: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 3: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 3: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 3: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 3: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 3: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 3: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 3: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 0.158 seconds (Warm-up)
## Chain 3:                0.521 seconds (Sampling)
## Chain 3:                0.679 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 1.2e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.12 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 4: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 4: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 4: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 4: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 4: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 4: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 4: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 4: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 4: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 4: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 4: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 0.159 seconds (Warm-up)
## Chain 4:                0.532 seconds (Sampling)
## Chain 4:                0.691 seconds (Total)
## Chain 4:
summary(reduced_wine_model, probs = c(0.025, 0.975))
## 
## Model Info:
##  function:     stan_glm
##  family:       gaussian [identity]
##  formula:      quality ~ volatile.acidity + chlorides + sulphates + alcohol
##  algorithm:    sampling
##  sample:       20000 (posterior sample size)
##  priors:       see help('prior_summary')
##  observations: 1143
##  predictors:   5
## 
## Estimates:
##                    mean   sd   2.5%   97.5%
## (Intercept)       2.8    0.2  2.4    3.3   
## volatile.acidity -1.2    0.1 -1.4   -1.0   
## chlorides        -1.4    0.5 -2.4   -0.5   
## sulphates         0.8    0.1  0.6    1.1   
## alcohol           0.3    0.0  0.3    0.3   
## sigma             0.6    0.0  0.6    0.7   
## 
## Fit Diagnostics:
##            mean   sd   2.5%   97.5%
## mean_PPD 5.7    0.0  5.6    5.7    
## 
## The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
## 
## MCMC diagnostics
##                  mcse Rhat n_eff
## (Intercept)      0.0  1.0  22396
## volatile.acidity 0.0  1.0  20878
## chlorides        0.0  1.0  17281
## sulphates        0.0  1.0  16955
## alcohol          0.0  1.0  21505
## sigma            0.0  1.0  24511
## mean_PPD         0.0  1.0  21338
## log-posterior    0.0  1.0   8933
## 
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
# effective sample size ratio
neff_ratio(reduced_wine_model)
##      (Intercept) volatile.acidity        chlorides        sulphates 
##          1.11980          1.04390          0.86405          0.84775 
##          alcohol            sigma 
##          1.07525          1.22555

2.2.1 Check reduced MCMC diagnostics

mcmc_trace(reduced_wine_model)

mcmc_dens_overlay(reduced_wine_model)

mcmc_acf(reduced_wine_model)

2.2.2 Posterior Parameter sampling and comparison to data

pp_check(full_wine_model)

pp_check(reduced_wine_model) #Reduced model closer to data observations; multimodel traits of data is due to discrete values for quality.

2.2.3 Predictions

full_predictions <- posterior_predict(full_wine_model, newdata = wine)

reduced_predictions <- posterior_predict(reduced_wine_model, newdata = wine)

ppc_intervals(y = wine$quality, yrep = full_predictions, x = wine$volatile.acidity, prob = 0.5, prob_outer = 0.95) + labs(x = 'Volatile.acidity' , y = 'Quality', title = 'Full Model')

ppc_intervals(y = wine$quality, yrep = reduced_predictions, x = wine$volatile.acidity, prob = 0.5, prob_outer = 0.95) + labs(x = 'Volatile.acidity' , y = 'Quality', title = 'Reduced Model')

Reduced model has just as good coverage as full for posterior predictive intervals.

2.2.4 Cross validation

full_wine_cv <- prediction_summary_cv(data = wine, model = full_wine_model, k = 10)
full_wine_cv$cv
##         mae mae_scaled within_50 within_95
## 1 0.4033039  0.6273625 0.5399542 0.9484058
reduced_wine_cv <- prediction_summary_cv(data = wine, model = reduced_wine_model, k = 10)
reduced_wine_cv$cv
##         mae mae_scaled within_50 within_95
## 1 0.4157651  0.6393778  0.531968 0.9440122

The MAE between predicted quality and actual is typically small at about 0.4 for both models, 0.62 for scaled errors in both, and 0.53 for 50% PPIs, and 0.94 for 95% PPIs for both models as well. Prediction coverage is good with small error.

#Expected Log-Predictive Densities for model comparison
loo_1 <- loo(full_wine_model)
loo_2 <- loo(reduced_wine_model)

c(loo_1$estimates[1], loo_2$estimates[1])
## [1] -1122.175 -1128.180
loo_compare(loo_1, loo_2)
##                    elpd_diff se_diff
## full_wine_model     0.0       0.0   
## reduced_wine_model -6.0       5.9

Full model may have 6 units of better predictive power, with the true difference being roughly within 11.8 units or 2 SE of the estimated difference. With this small, and possibly 0, difference in predictive power, the smaller model is better.

2.2.5 Predictions at a particular set of predictor values

wine_new <- as.data.frame(reduced_wine_model) |>
  mutate(mu = `(Intercept)` + volatile.acidity*0.7 + chlorides*0.076 + sulphates*0.56 + alcohol*9.4, 
         y_new = rnorm(20000, mu, sigma))

wine_actual_obs <- wine |>
  filter(volatile.acidity == 0.7, chlorides == 0.076, sulphates == 0.56, alcohol == 9.4) #Actual quality is 5

wine_new |>
  ggplot(aes(x = y_new)) +
  geom_density() +
  geom_vline(xintercept = mean(wine_new$y_new))

wine_new |>
  summarise(mean = mean(y_new), sd = sd(y_new), error = 5 - mean(y_new), scaled_error = (5 - mean(y_new)) / sd(y_new))
##       mean        sd       error scaled_error
## 1 5.075293 0.6464516 -0.07529333   -0.1164717
#c
ppc_intervals(wine$quality, yrep = matrix(wine_new$y_new, ncol = 1143), x = wine$volatile.acidity)
## Warning in matrix(wine_new$y_new, ncol = 1143): data length [20000] is not a
## sub-multiple or multiple of the number of rows [18]

#Model Predictions for some fixed predictor values accurately model the response. Average predicted error from the actual is small, as well as the scaled error.